Raw R code and data visualization for CASA0005

The main purpose for this assignment’s report is to find out the spatial distribution of COVID-19 new cases in London borough and to quantify the effect of ‘Lockdown’ policy on the suppress of the increase of new cases based on statistical analysis and spatial autocorrelation.

Data visualization on map

#import library 
library(sf)
library(tmap)
library(tmaptools)
library(tidyverse)
library(here)
library(janitor)
library(RColorBrewer)
library(sp)
library(spdep)

After completing loading libraries, loading all datasets needed for the analysis:

#read all dataset for the analysis
#read London borough shapefile
Londonborough <-
  st_read(
    here::here(
      'data',
      'statistical-gis-boundaries-london',
      'ESRI',
      'London_Borough_Excluding_MHW.shp'
    )
  ) %>%
  st_transform(., 27700) #transform to 27700
## Reading layer `London_Borough_Excluding_MHW' from data source `/Users/ziyichen/Desktop/SDSV/GIS/Data_for_CW/Code&data/data/statistical-gis-boundaries-london/ESRI/London_Borough_Excluding_MHW.shp' using driver `ESRI Shapefile'
## Simple feature collection with 33 features and 7 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: 503568.2 ymin: 155850.8 xmax: 561957.5 ymax: 200933.9
## projected CRS:  OSGB 1936 / British National Grid

Read csv file–>cumulative new cases in each time period, raw csv files are in Github repo

#cumulative new cases in 3.23-4.22 period for London boroughs
covid_323_422 <-
  read_csv(
    '/Users/ziyichen/Desktop/SDSV/GIS/Data_for_CW/Code&data/data/new_cases-323-422.csv',
    na = c("NA", "n/a")
  ) %>%
  clean_names()
## 
## ─ Column specification ──────────────────────────────────────────
## cols(
##   area_code = col_character(),
##   new_cases = col_double(),
##   `population(10k)` = col_double(),
##   `new_cases per 10k population` = col_double()
## )
#cumulative new cases in 5.23-6.22 period for London boroughs
covid_523_622 <-
  read_csv(
    '/Users/ziyichen/Desktop/SDSV/GIS/Data_for_CW/Code&data/data/new_cases-523-622.csv',
    na = c("NA", "n/a")
  ) %>%
  clean_names()
## 
## ─ Column specification ──────────────────────────────────────────
## cols(
##   area_code = col_character(),
##   new_cases = col_double(),
##   population_10k = col_double(),
##   `new_cases_per_10k_523-622` = col_double()
## )
names(covid_323_422) #present the names of data file
## [1] "area_code"                    "new_cases"                   
## [3] "population_10k"               "new_cases_per_10k_population"
names(covid_523_622)
## [1] "area_code"                 "new_cases"                
## [3] "population_10k"            "new_cases_per_10k_523_622"

Connect the CSV file data and shapefile

#join data with shapefile and new cases data in 3.23-4.22 period
Londonborough_merged <-
  Londonborough %>% left_join(covid_323_422, by = c('GSS_CODE' = 'area_code')) %>%
  distinct(GSS_CODE, NAME, new_cases,new_cases_per_10k_population) #choose which column to be used in the following analysis

#join data with shapefile and new cases data in 5.23-6.22 period
Londonborough_merged_523 <-
  Londonborough %>% left_join(covid_523_622, by = c('GSS_CODE' = 'area_code')) %>%
  distinct(GSS_CODE, NAME, new_cases, new_cases_per_10k_523_622) #choose which column to be used in the following analysis
#making maps
tmap_mode('view')
## tmap mode set to interactive viewing
breaks_323 = c(10, 15, 20, 25, 30, 35) 
tm1 <- tm_shape(Londonborough_merged) +
  tm_polygons(
    'new_cases_per_10k_population',
    breaks = breaks_323,
    alpha = 0.5,
    palette = 'PuBu',
    colorNA = 'white',
    title = 'New cases rate'
  ) + tm_layout(
    legend.position = c('left', 'bottom'),
    legend.outside = FALSE,
    legend.title.size = 1.2,
    legend.text.size = 0.75,
    frame = FALSE
  ) + tm_credits('(A) New cases rate in 3.23-4.22', position = c('left', 'top'), size = 1.2) + tm_compass(type = "arrow", position = c("right", "bottom")) +tm_scale_bar(position =c('right', 'bottom'), text.size = 0.6)
#view map 1
tm1
## Credits not supported in view mode.
## Compass not supported in view mode.
## legend.postion is used for plot mode. Use view.legend.position in tm_view to set the legend position in view mode.
#saving map
tmap_save(tm1,'/Users/ziyichen/Desktop/SDSV/GIS/Data_for_CW/Code&data/data/new_cases_rate(323-422).png')
## Map saved to /Users/ziyichen/Desktop/SDSV/GIS/Data_for_CW/Code&data/data/new_cases_rate(323-422).png
## Resolution: 2389.896 by 1845.268 pixels
## Size: 7.966321 by 6.150895 inches (300 dpi)
#making map 2
breaks_523=c(0,0.5,1,1.5,2,2.5,3)
tm2 <- tm_shape(Londonborough_merged_523) +
  tm_polygons(
    'new_cases_per_10k_523_622',
    breaks = breaks_523,
    alpha = 0.5,
    palette = 'PuBu',
    colorNA = 'white',
    title = 'New cases rate'
  ) + tm_layout(
    legend.position = c('left', 'bottom'),
    legend.outside = FALSE,
    legend.title.size = 1.2,
    legend.text.size = 0.75,
    frame = FALSE
  ) +
  tm_compass(type = "arrow", position = c("right", "bottom")) + tm_credits('(B) New cases rate in 5.23-6.22',
                                                                           position = c('left', 'top'),
                                                                           size =
                                                                             1.2) +
  tm_scale_bar(position = c('right', 'bottom'), text.size = 0.6)
tm2
## Credits not supported in view mode.
## Compass not supported in view mode.
## legend.postion is used for plot mode. Use view.legend.position in tm_view to set the legend position in view mode.
tmap_save(tm2,'/Users/ziyichen/Desktop/SDSV/GIS/Data_for_CW/Code&data/data/new_cases_rate(523-622).png')
## Map saved to /Users/ziyichen/Desktop/SDSV/GIS/Data_for_CW/Code&data/data/new_cases_rate(523-622).png
## Resolution: 2389.896 by 1845.268 pixels
## Size: 7.966321 by 6.150895 inches (300 dpi)
#combine two map
t=tmap_arrange(tm1,tm2,nrow=1)
tmap_save(t,'/Users/ziyichen/Desktop/SDSV/GIS/Data_for_CW/Code&data/data/New_cases_rate.png')
## Map saved to /Users/ziyichen/Desktop/SDSV/GIS/Data_for_CW/Code&data/data/New_cases_rate.png
## Resolution: 2100 by 2100 pixels
## Size: 7 by 7 inches (300 dpi)
t
## Credits not supported in view mode.
## Compass not supported in view mode.
## legend.postion is used for plot mode. Use view.legend.position in tm_view to set the legend position in view mode.
## Credits not supported in view mode.
## Compass not supported in view mode.
## legend.postion is used for plot mode. Use view.legend.position in tm_view to set the legend position in view mode.

###Global Moran

#data cleaning for 3.23-4.22 data
london_contain_city <-
  na.omit(Londonborough_merged) #exclude rows contain null data
#data cleaning for 5.23-6.22 data
london_contain_city_523 <- na.omit(Londonborough_merged_523)

#creating polygon for 323 dataset and 523 dataset
neibour_323 <- poly2nb(london_contain_city, queen = TRUE)
neibour_323[[1]]
## [1] 19 20 21 22
neibour_523 <- poly2nb(london_contain_city_523,queen=TRUE)
neibour_523[[1]]
## [1] 19 20 21 22
#assign weight matrix for each neighbouring polygon using row standardisation
lw_323 <- nb2listw(neibour_323, style = "W", zero.policy = TRUE) #each neighbour is assigned a quarter of total weight
lw_523 <- nb2listw(neibour_523,style='W',zero.policy = TRUE)

#computing global Moran's I 
moran.test(london_contain_city$new_cases_per_10k_population, lw_323)
## 
##  Moran I test under randomisation
## 
## data:  london_contain_city$new_cases_per_10k_population  
## weights: lw_323    
## 
## Moran I statistic standard deviate = 3.8101, p-value = 6.947e-05
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##        0.41467411       -0.03125000        0.01369805
moran.test(london_contain_city_523$new_cases_per_10k_523_622,lw_523)
## 
##  Moran I test under randomisation
## 
## data:  london_contain_city_523$new_cases_per_10k_523_622  
## weights: lw_523    
## 
## Moran I statistic standard deviate = 3.1759, p-value = 0.000747
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##        0.34070799       -0.03125000        0.01371722

###Local Moran visualization

#local moran value for data 3.23-4.22
local_moran_323 <- localmoran(london_contain_city$new_cases_per_10k_population, lw_323)
summary(local_moran_323)
##        Ii                E.Ii              Var.Ii            Z.Ii         
##  Min.   :-0.40332   Min.   :-0.03125   Min.   :0.1113   Min.   :-0.68068  
##  1st Qu.: 0.02446   1st Qu.:-0.03125   1st Qu.:0.1675   1st Qu.: 0.08188  
##  Median : 0.16183   Median :-0.03125   Median :0.2167   Median : 0.35322  
##  Mean   : 0.41467   Mean   :-0.03125   Mean   :0.2329   Mean   : 0.99968  
##  3rd Qu.: 0.74304   3rd Qu.:-0.03125   3rd Qu.:0.2988   3rd Qu.: 1.74509  
##  Max.   : 2.09011   Max.   :-0.03125   Max.   :0.4629   Max.   : 4.55659  
##    Pr(z > 0)        
##  Min.   :0.0000026  
##  1st Qu.:0.0404846  
##  Median :0.3619619  
##  Mean   :0.2834692  
##  3rd Qu.:0.4673719  
##  Max.   :0.7519639
#local moran value for data 5.23-6.22
local_moran_523 <- localmoran(london_contain_city_523$new_cases_per_10k_523_622,lw_523)
summary(local_moran_523)
##        Ii                 E.Ii              Var.Ii            Z.Ii        
##  Min.   :-0.269515   Min.   :-0.03125   Min.   :0.1114   Min.   :-0.5115  
##  1st Qu.:-0.006033   1st Qu.:-0.03125   1st Qu.:0.1677   1st Qu.: 0.0553  
##  Median : 0.179887   Median :-0.03125   Median :0.2170   Median : 0.3860  
##  Mean   : 0.340708   Mean   :-0.03125   Mean   :0.2332   Mean   : 0.8499  
##  3rd Qu.: 0.629741   3rd Qu.:-0.03125   3rd Qu.:0.2992   3rd Qu.: 1.4635  
##  Max.   : 1.543701   Max.   :-0.03125   Max.   :0.4635   Max.   : 3.8459  
##    Pr(z > 0)        
##  Min.   :0.0000601  
##  1st Qu.:0.0716624  
##  Median :0.3497394  
##  Mean   :0.3072030  
##  3rd Qu.:0.4779496  
##  Max.   :0.6954938
#plot local moran map
#There are 5 columns of data.
#copy some of the columns (the I score (column 1) and the z-score standard deviation (column 4))
#the z-score (standardised value relating to whether high values or low values are clustering together)
#change local_moran type
local_moran_tibble_323 <- as_tibble(local_moran_323) #change to dataframe
local_moran_tibble_523 <- as_tibble(local_moran_523)

london_contain_city <-
  london_contain_city %>% mutate(local_moran_I = as.numeric(local_moran_tibble_323$Ii)) %>% mutate(local_moran_z =as.numeric(local_moran_tibble_323$Z.Ii))

london_contain_city_523 <-
  london_contain_city_523 %>% mutate(local_moran_I = as.numeric(local_moran_tibble_523$Ii)) %>% mutate(local_moran_z =
                                                                                                         as.numeric(local_moran_tibble_523$Z.Ii))

#setting color
MoranColours <- rev(brewer.pal(8, "RdBu"))

#plot a map (in Rmd documend, tmap mode changes to viewing, but raw code is plotting mode)
tmap_mode('view')
## tmap mode set to interactive viewing
#plotting local moran map for 3.23-4.22
tm_323 <- tm_shape(london_contain_city) +
  tm_polygons(
    'local_moran_I',
    alpha = 0.5,
    palette = MoranColours,
    title = 'Local Moran I',midpoint=NA
  ) + tm_layout(
    legend.position = c('left', 'bottom'),
    legend.outside = FALSE,
    legend.title.size = 1.2,
    legend.text.size = 0.75,
    frame = FALSE
  ) + tm_compass(type = "arrow", position = c("right", "bottom")) +tm_scale_bar(position =
                                                                                  c('right', 'bottom'), text.size = 0.6)+tm_credits('(A) Local Moran in 3.23-4.22',position = c('left','top'),size = 1.2)

tm_323
## Credits not supported in view mode.
## Compass not supported in view mode.
## legend.postion is used for plot mode. Use view.legend.position in tm_view to set the legend position in view mode.
tmap_save(tm_323,'/Users/ziyichen/Desktop/SDSV/GIS/Data_for_CW/Code&data/data/local_moran_323.png')
## Map saved to /Users/ziyichen/Desktop/SDSV/GIS/Data_for_CW/Code&data/data/local_moran_323.png
## Resolution: 2389.896 by 1845.268 pixels
## Size: 7.966321 by 6.150895 inches (300 dpi)
#plotting local moran map for 5.23-6.22 dataset
tm_523 <- tm_shape(london_contain_city_523) +
  tm_polygons(
    'local_moran_I',
    alpha = 0.5,
    palette = MoranColours,
    title = 'Local Moran I',midpoint=NA
  ) + tm_layout(
    legend.position = c('left', 'bottom'),
    legend.outside = FALSE,
    legend.title.size = 1.2,
    legend.text.size = 0.75,
    frame = FALSE
  ) +tm_compass(type = "arrow", position = c("right", "bottom")) +tm_scale_bar(position =
                                                                                 c('right', 'bottom'), text.size = 0.6)+tm_credits('(B) Local Moran in 5.23-6.22',position = c('left','top'),size = 1.2)

tm_523
## Credits not supported in view mode.
## Compass not supported in view mode.
## legend.postion is used for plot mode. Use view.legend.position in tm_view to set the legend position in view mode.
tmap_save(tm_523,'/Users/ziyichen/Desktop/SDSV/GIS/Data_for_CW/Code&data/data/local_moran_523.png')
## Map saved to /Users/ziyichen/Desktop/SDSV/GIS/Data_for_CW/Code&data/data/local_moran_523.png
## Resolution: 2389.896 by 1845.268 pixels
## Size: 7.966321 by 6.150895 inches (300 dpi)
#combine two map
t_c=tmap_arrange(tm_323,tm_523,nrow=1)
t_c
## Credits not supported in view mode.
## Compass not supported in view mode.
## legend.postion is used for plot mode. Use view.legend.position in tm_view to set the legend position in view mode.
## Credits not supported in view mode.
## Compass not supported in view mode.
## legend.postion is used for plot mode. Use view.legend.position in tm_view to set the legend position in view mode.
tmap_save(t_c,'/Users/ziyichen/Desktop/SDSV/GIS/Data_for_CW/Code&data/data/local_moran_combined.png')
## Map saved to /Users/ziyichen/Desktop/SDSV/GIS/Data_for_CW/Code&data/data/local_moran_combined.png
## Resolution: 2100 by 2100 pixels
## Size: 7 by 7 inches (300 dpi)

###LISA clusters ploting

quadrant_323 <- vector(mode='numeric',length=nrow(london_contain_city))

#centers the variable of interest around its mean
m.new_cases_rate <- london_contain_city$new_cases_per_10k_population-mean(london_contain_city$new_cases_per_10k_population)

#centers the local moran's around the mean
m.local_323 <- local_moran_323[,1]-mean(local_moran_323[,1])

#significance threshold
signif <- 0.05

#builds a data quadrant
quadrant_323[m.new_cases_rate>0 & m.local_323>0] <- 4
quadrant_323[m.new_cases_rate<0 & m.local_323<0] <- 1
quadrant_323[m.new_cases_rate<0 & m.local_323>0] <- 2
quadrant_323[m.new_cases_rate>0 & m.local_323<0] <- 3
quadrant_323[local_moran_323[,5]>signif] <- 0

#plotting in r
brks <- c(0,1,2,3,4)
colors <- c('white','blue',rgb(0,0,1,alpha=0.4),rgb(1,0,0,alpha=0.4),'red')
plot(london_contain_city,border='lightgray',col=colors[findInterval(quadrant_323,brks,all.inside = FALSE)],max.plot=1,main='LISA cluster')

legend('bottomright',legend=c('insignificant',
                             'low-low','low-high',
                             'high-low','high-high'),fill=colors,bty='n')

#for 5.23-6.22 data
quadrant_523 <- vector(mode='numeric',length=nrow(london_contain_city_523))
m.new_cases_rate_523 <- london_contain_city_523$new_cases_per_10k_523_622-mean(london_contain_city_523$new_cases_per_10k_523_622)

m.local_523 <- local_moran_523[,1]-mean(local_moran_523[,1])
quadrant_523[m.new_cases_rate_523>0 & m.local_523>0] <- 4
quadrant_523[m.new_cases_rate_523<0 & m.local_523<0] <- 1
quadrant_523[m.new_cases_rate_523<0 & m.local_523>0] <- 2
quadrant_523[m.new_cases_rate_523>0 & m.local_523<0] <- 3
quadrant_523[local_moran_523[,5]>signif] <- 0


plot(london_contain_city_523,border='lightgray',col=colors[findInterval(quadrant_523,brks,all.inside = FALSE)],max.plot=1,main='LISA cluster')
legend('bottomright',legend=c('insignificant',
                              'low-low','low-high',
                              'high-low','high-high'),fill=colors,bty='n')

###Geary’s test result

#geary's test
#geary's test for 3.23-4.22 data
geary.test(london_contain_city$new_cases_per_10k_population, lw_323)
## 
##  Geary C test under randomisation
## 
## data:  london_contain_city$new_cases_per_10k_population 
## weights: lw_323 
## 
## Geary C statistic standard deviate = 3.5207, p-value = 0.0002152
## alternative hypothesis: Expectation greater than statistic
## sample estimates:
## Geary C statistic       Expectation          Variance 
##        0.58221904        1.00000000        0.01408156
#geary's test for 5.23-6.22 data
geary.test(london_contain_city_523$new_cases_per_10k_523_622,lw_523)
## 
##  Geary C test under randomisation
## 
## data:  london_contain_city_523$new_cases_per_10k_523_622 
## weights: lw_523 
## 
## Geary C statistic standard deviate = 2.6398, p-value = 0.004148
## alternative hypothesis: Expectation greater than statistic
## sample estimates:
## Geary C statistic       Expectation          Variance 
##        0.68693345        1.00000000        0.01406505

###Getis Ord General G result

#Getis Ord General G for 3.23-4.22
globalG.test(london_contain_city$new_cases_per_10k_population,lw_323)
## Warning in globalG.test(london_contain_city$new_cases_per_10k_population, :
## Binary weights recommended (sepecially for distance bands)
## 
##  Getis-Ord global G statistic
## 
## data:  london_contain_city$new_cases_per_10k_population 
## weights: lw_323 
## 
## standard deviate = 2.5545, p-value = 0.005316
## alternative hypothesis: greater
## sample estimates:
## Global G statistic        Expectation           Variance 
##       3.230196e-02       3.125000e-02       1.695799e-07
#Getis Ord General G for 5.23-6.22
globalG.test(london_contain_city_523$new_cases_per_10k_523_622,lw_523)
## Warning in globalG.test(london_contain_city_523$new_cases_per_10k_523_622, :
## Binary weights recommended (sepecially for distance bands)
## 
##  Getis-Ord global G statistic
## 
## data:  london_contain_city_523$new_cases_per_10k_523_622 
## weights: lw_523 
## 
## standard deviate = 1.648, p-value = 0.04968
## alternative hypothesis: greater
## sample estimates:
## Global G statistic        Expectation           Variance 
##       3.244483e-02       3.125000e-02       5.256448e-07

###Descriptive statistics summary

library(ggplot2)
#histrogram
#only use rate of new cases to plot histrogram
covid_323_hist <- hist(covid_323_422$new_cases_per_10k_population,col='Dark blue',density=10,xlab='Rate of new cases',main='Rate of new cases during 3.23-4.22',angle=45,ylim=c(0,15))

mean_323 <- mean(covid_323_422$new_cases_per_10k_population) #mean value
var_323 <- var(covid_323_422$new_cases_per_10k_population) #variance value
std_323 <- sd(covid_323_422$new_cases_per_10k_population) #standard deviation
mean_323
## [1] 23.41515
var_323
## [1] 25.54008
std_323
## [1] 5.053719
covid_523_hist <- hist(covid_523_622$new_cases_per_10k_523_622,col='Dark blue',density=10,xlab='Rate of new cases',main='Rate of new cases during 5.23-6.22',angle=45,ylim=c(0,15))

mean_523 <- mean(covid_523_622$new_cases_per_10k_523_622)
var_523 <- var(covid_523_622$new_cases_per_10k_523_622)
std_523 <- sd(covid_523_622$new_cases_per_10k_523_622)
mean_523
## [1] 1.724242
var_523
## [1] 0.3450189
std_523
## [1] 0.5873831